1.6. Main Regression Analysis
1.6.1. Regression models for AUC
1.6.1.1. Additive model with main variables (dose, route of administration, blood sampling method)
log(AUC) ~ dose_g_per_kg + route + site
fit_AUC_mod <- lm(log(AUC) ~ dose_g_per_kg + route + site, order_0_df) #+food
fit_AUC_submod <- lm(log(AUC) ~ 1, order_0_df)
#broom::tidy(fit_AUC_mod, conf.int = TRUE)
#broom::tidy(fit_AUC_submod, conf.int = TRUE)
summ(fit_AUC_mod, confint = TRUE, digits = 3)
## MODEL INFO:
## Observations: 96 (50 missing obs. deleted)
## Dependent Variable: log(AUC)
## Type: OLS linear regression
##
## MODEL FIT:
## F(3,92) = 102.707, p = 0.000
## R² = 0.770
## Adj. R² = 0.763
##
## Standard errors:OLS
## ---------------------------------------------------------------
## Est. 2.5% 97.5% t val. p
## ------------------- -------- -------- -------- -------- -------
## (Intercept) 4.167 3.874 4.461 28.200 0.000
## dose_g_per_kg 1.544 0.934 2.155 5.021 0.000
## routePO 0.085 -0.093 0.262 0.947 0.346
## sitevenous -0.342 -0.441 -0.243 -6.875 0.000
## ---------------------------------------------------------------
#summary(fit_AUC_mod)
#summary(fit_AUC_submod)
anova(fit_AUC_mod, fit_AUC_submod)
## Analysis of Variance Table
##
## Model 1: log(AUC) ~ dose_g_per_kg + route + site
## Model 2: log(AUC) ~ 1
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 92 3.3115
## 2 95 14.4023 -3 -11.091 102.71 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
check_model(fit_AUC_mod)
ggpredict(fit_AUC_mod, terms = c("dose_g_per_kg", "route", "site"), ci_level = 0) %>% plot(show_data = TRUE, log_y = FALSE, jitter = .01) + labs(subtitle = "Adjusted R-squared: 0.7626")
## Model has log-transformed response. Back-transforming predictions to
## original response scale. Standard errors are still on the transformed
## scale.
plot_coefs(fit_AUC_mod, omit.coefs = NULL, scale = TRUE, ci_level = 0.95, plot.distributions = FALSE, inner_ci_level = .1, colors = "CUD") + labs(title = "Regression coeffecients with 95% CI")
1.6.1.2. Additive model with additional variables
log(AUC) ~ dose_g_per_kg + route + site + sex
fit_AUC_mod_add <- lm(log(AUC) ~ dose_g_per_kg + route + site + sex, order_0_df) #age cannot be added
summ(fit_AUC_mod, confint = TRUE, digits = 3)
## MODEL INFO:
## Observations: 96 (50 missing obs. deleted)
## Dependent Variable: log(AUC)
## Type: OLS linear regression
##
## MODEL FIT:
## F(3,92) = 102.707, p = 0.000
## R² = 0.770
## Adj. R² = 0.763
##
## Standard errors:OLS
## ---------------------------------------------------------------
## Est. 2.5% 97.5% t val. p
## ------------------- -------- -------- -------- -------- -------
## (Intercept) 4.167 3.874 4.461 28.200 0.000
## dose_g_per_kg 1.544 0.934 2.155 5.021 0.000
## routePO 0.085 -0.093 0.262 0.947 0.346
## sitevenous -0.342 -0.441 -0.243 -6.875 0.000
## ---------------------------------------------------------------
summ(fit_AUC_mod_add, confint = TRUE, digits = 3)
## MODEL INFO:
## Observations: 72 (74 missing obs. deleted)
## Dependent Variable: log(AUC)
## Type: OLS linear regression
##
## MODEL FIT:
## F(4,67) = 39.615, p = 0.000
## R² = 0.703
## Adj. R² = 0.685
##
## Standard errors:OLS
## ---------------------------------------------------------------
## Est. 2.5% 97.5% t val. p
## ------------------- -------- -------- -------- -------- -------
## (Intercept) 3.144 2.407 3.880 8.523 0.000
## dose_g_per_kg 3.154 1.797 4.510 4.640 0.000
## routePO 0.116 -0.080 0.311 1.180 0.242
## sitevenous -0.341 -0.535 -0.147 -3.512 0.001
## sexM -0.068 -0.311 0.175 -0.556 0.580
## ---------------------------------------------------------------
# summary(fit_AUC_mod)
# summary(fit_AUC_mod_add)
#anova(fit_AUC_mod, fit_AUC_mod_add) Anova does not work with variable sex
check_model(fit_AUC_mod_add)
ggpredict(fit_AUC_mod_add, terms = c("dose_g_per_kg", "route", "site", "sex")) %>% plot(show_data = TRUE, log_y = F, jitter = .01) #+ labs(subtitle = "Adjusted R-squared: 0.7626")
## Model has log-transformed response. Back-transforming predictions to
## original response scale. Standard errors are still on the transformed
## scale.
1.6.1.3. Model with interactions
log(AUC) ~ dose_g_per_kg*route + site
It is logical that AUC will depend on the combination of
dose and method of administration.
fit_AUC_mod_int <- lm(log(AUC) ~ dose_g_per_kg*route + site, order_0_df)
# fit_AUC_mod_int2 <- lm(log(AUC) ~ dose_g_per_kg + route*site, order_0_df)
# fit_AUC_mod_int3 <- lm(log(AUC) ~ dose_g_per_kg*site + route, order_0_df)
# fit_AUC_mod_int4 <- lm(log(AUC) ~ dose_g_per_kg*site*route, order_0_df)
summ(fit_AUC_mod_int, confint = TRUE, digits = 3)
## MODEL INFO:
## Observations: 96 (50 missing obs. deleted)
## Dependent Variable: log(AUC)
## Type: OLS linear regression
##
## MODEL FIT:
## F(4,91) = 95.676, p = 0.000
## R² = 0.808
## Adj. R² = 0.799
##
## Standard errors:OLS
## -----------------------------------------------------------------------
## Est. 2.5% 97.5% t val. p
## --------------------------- -------- -------- -------- -------- -------
## (Intercept) 4.520 4.204 4.837 28.365 0.000
## dose_g_per_kg 0.672 -0.023 1.367 1.921 0.058
## routePO -1.079 -1.649 -0.509 -3.762 0.000
## sitevenous -0.358 -0.449 -0.266 -7.791 0.000
## dose_g_per_kg:routePO 2.107 1.118 3.096 4.233 0.000
## -----------------------------------------------------------------------
anova(fit_AUC_mod_int, fit_AUC_submod)
## Analysis of Variance Table
##
## Model 1: log(AUC) ~ dose_g_per_kg * route + site
## Model 2: log(AUC) ~ 1
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 91 2.7667
## 2 95 14.4023 -4 -11.636 95.676 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
check_model(fit_AUC_mod_int)
ggpredict(fit_AUC_mod_int, terms = c("dose_g_per_kg", "route", "site"), ci_level = 0) %>% plot(show_data = TRUE, log_y = FALSE, jitter = .01) + labs(subtitle = "Adjusted R-squared: 0.799") + xlab("Dose (g per kg)")
## Model has log-transformed response. Back-transforming predictions to
## original response scale. Standard errors are still on the transformed
## scale.
plot_coefs(fit_AUC_mod_int, omit.coefs = NULL, scale = TRUE, ci_level = 0.95, plot.distributions = FALSE, inner_ci_level = .1, colors = "CUD") + labs(title = "Regression coeffecients with 95% CI")
plot_model(fit_AUC_mod_int, show.values = TRUE, show.intercept = TRUE, value.offset = 0.3, axis.labels = c("Dose (g per kg)", "Route (PO)", "Blood sampling (venous)", "Dose (g per kg) x Route (PO)", "Intercept"), title = "Regression coeffecients with 95% CI") +
theme_nice()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## ℹ The deprecated feature was likely used in the sjPlot package.
## Please report the issue at <https://github.com/strengejacke/sjPlot/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
Coclusions:
Including the interaction between dose_g_per_kg and
route increases \(R^2\) by
about 3%.
1.6.2. Regression models for Tmax
order_0_df <- order_0_df %>%
mutate(bmi = ifelse(is.na(bmi) & !is.na(wt_kg) & !is.na(bh_cm),
wt_kg/(bh_cm/100)^2, bmi),
bsa_m2 = ifelse(is.na(bsa_m2) & !is.na(wt_kg) & !is.na(bh_cm),
sqrt((bh_cm * wt_kg)/3600), bsa_m2))
t_max_full_m <- lm(Tmax ~ dose_g_per_kg * food + route + wt_kg, data = order_0_df)
summary(t_max_full_m)
##
## Call:
## lm(formula = Tmax ~ dose_g_per_kg * food + route + wt_kg, data = order_0_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -39.900 -10.838 1.341 4.782 42.309
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 253.9999 102.2243 2.485 0.02303 *
## dose_g_per_kg -165.4839 145.4369 -1.138 0.27011
## foodfed -247.9988 129.1429 -1.920 0.07080 .
## routePO -48.0073 12.3665 -3.882 0.00109 **
## wt_kg -0.4782 0.4216 -1.134 0.27153
## dose_g_per_kg:foodfed 433.3209 221.8195 1.953 0.06648 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 21.64 on 18 degrees of freedom
## (122 пропущенных наблюдений удалены)
## Multiple R-squared: 0.6062, Adjusted R-squared: 0.4969
## F-statistic: 5.542 on 5 and 18 DF, p-value: 0.002916
check_model(t_max_full_m)
ggResidpanel::resid_xpanel(t_max_full_m)
ggResidpanel::resid_panel(t_max_full_m, plots = c("lev", "cookd"))
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## ℹ The deprecated feature was likely used in the ggResidpanel package.
## Please report the issue to the authors.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
Estimating model coefficients using sandwich estimator
t_max_coef <- coeftest(t_max_full_m, vcov. = vcovHC, type = "HC3") %>%
broom::tidy(conf.int = TRUE) %>%
mutate(term = c("Intercept",
"Dose (g/kg)",
"Food status: \"Fed\"",
"Peroral route",
"Weight, kg",
"Interaction between\ndose and status \"fed\"")) %>%
rename("Variable" = term,
"β estimate" = estimate,
"Standard error" = std.error,
"Wald t-test statistics" = statistic,
"P-value" = p.value,
"95%CI low" = conf.low,
"95%CI high" = conf.high)
t_max_coef %>%
mutate(across(where(is.numeric), ~round(.x, 3))) %>%
flextable()
Variable | β estimate | Standard error | Wald t-test statistics | P-value | 95%CI low | 95%CI high |
|---|---|---|---|---|---|---|
Intercept | 254.000 | 162.510 | 1.563 | 0.135 | -87.421 | 595.421 |
Dose (g/kg) | -165.484 | 233.769 | -0.708 | 0.488 | -656.615 | 325.647 |
Food status: "Fed" | -247.999 | 211.833 | -1.171 | 0.257 | -693.044 | 197.046 |
Peroral route | -48.007 | 14.871 | -3.228 | 0.005 | -79.249 | -16.765 |
Weight, kg | -0.478 | 0.675 | -0.709 | 0.488 | -1.896 | 0.939 |
Interaction between | 433.321 | 353.804 | 1.225 | 0.236 | -309.993 | 1,176.635 |
t_max_prediction_plot <- fortify(t_max_full_m)
## Warning: `fortify(<lm>)` was deprecated in ggplot2 4.0.0.
## ℹ Please use `broom::augment(<lm>)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
ggplot(t_max_prediction_plot, aes(x = .fitted, y = Tmax)) +
geom_point(size = 2) +
ylim(c(25,130)) +
xlim(c(25,130)) +
geom_abline(slope = 1, intercept = 0, color = "skyblue3",
linetype = "dashed", linewidth = 2) +
geom_text(x = 75, y = 130, label = "Adjusted R-squared = 0.497", size = 8) +
labs(title = "Predicted vs observed Tmax values",
x = "Fitted values of Tmax, min",
y = "Observed values of Tmax, min")
t_max_coef %>%
ggplot(aes(y = `Variable`)) +
geom_point(aes(x = `β estimate`), size = 3) +
geom_errorbar(aes(xmin = `95%CI low`, xmax = `95%CI high`), width = 0.5) +
geom_vline(xintercept = 0, color = "darkred") +
labs(title = "Coefficients and 95%CI in Tmax model")
1.6.3. Regression models for Cmax
log(Cmax) ~ dose_g_per_kg + route + food + site
order_0_df_factor <- order_0_df %>%
mutate(
route = factor(route),
food = factor(food),
site = factor(site)
)
Cmax_lm2_v1 <- lm(log(Cmax) ~ dose_g_per_kg + route + food + site, data = order_0_df_factor)
summary(Cmax_lm2_v1)
##
## Call:
## lm(formula = log(Cmax) ~ dose_g_per_kg + route + food + site,
## data = order_0_df_factor)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.48366 -0.10779 0.01501 0.10903 0.36264
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.60571 0.12810 -4.728 5.43e-06 ***
## dose_g_per_kg 0.57351 0.26664 2.151 0.0332 *
## routePO 0.11544 0.07734 1.493 0.1378
## foodfed -0.94209 0.07201 -13.083 < 2e-16 ***
## sitevenous 0.77382 0.06083 12.720 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1645 on 141 degrees of freedom
## Multiple R-squared: 0.8369, Adjusted R-squared: 0.8322
## F-statistic: 180.8 on 4 and 141 DF, p-value: < 2.2e-16
check_model(Cmax_lm2_v1)
broom::tidy(Cmax_lm2_v1, conf.int = TRUE)
## # A tibble: 5 × 7
## term estimate std.error statistic p.value conf.low conf.high
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) -0.606 0.128 -4.73 5.43e- 6 -0.859 -0.352
## 2 dose_g_per_kg 0.574 0.267 2.15 3.32e- 2 0.0464 1.10
## 3 routePO 0.115 0.0773 1.49 1.38e- 1 -0.0375 0.268
## 4 foodfed -0.942 0.0720 -13.1 4.18e-26 -1.08 -0.800
## 5 sitevenous 0.774 0.0608 12.7 3.61e-25 0.654 0.894
# analysis of model linearity using Residuals vs Fitted plot
autoplot(Cmax_lm2_v1, 1, ncol = 1)
# Linearity diagnosis with ggResidpanel library
resid_panel(Cmax_lm2_v1, plots = "resid", smoother = TRUE)
## `geom_smooth()` using formula = 'y ~ x'
# Diagnosis of homoscedasticity of residuals using the Scale-Location plot
autoplot(Cmax_lm2_v1, 3, ncol = 1)
# Diagnosing the normality of residual distribution using a Normal Q-Q plot:
resid_panel(Cmax_lm2_v1, plots = c("qq", "hist"))
# Diagnosis of outliers and anomalous observations using the Residuals vs Leverage plot and Cook's distance plot
resid_panel(Cmax_lm2_v1, plots = c("lev", "cookd"))
log(Cmax) ~ dose_g_per_kg + route + food + site + sex + age
order_0_df_factor <- order_0_df %>%
mutate(
route = factor(route),
food = factor(food),
site = factor(site),
sex = factor(sex)
)
Cmax_lm2_v2 <- lm(log(Cmax) ~ dose_g_per_kg + route + food + sex + age, data = order_0_df_factor)
summary(Cmax_lm2_v2)
##
## Call:
## lm(formula = log(Cmax) ~ dose_g_per_kg + route + food + sex +
## age, data = order_0_df_factor)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.33748 -0.07599 -0.00931 0.11259 0.29027
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.149688 0.490670 0.305 0.76134
## dose_g_per_kg -0.012206 0.953701 -0.013 0.98983
## routePO -0.507512 0.075700 -6.704 6.97e-09 ***
## foodfed -0.286139 0.093412 -3.063 0.00324 **
## sexM 0.087738 0.119204 0.736 0.46449
## age 0.004959 0.001153 4.301 6.13e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1523 on 62 degrees of freedom
## (78 пропущенных наблюдений удалены)
## Multiple R-squared: 0.8574, Adjusted R-squared: 0.8459
## F-statistic: 74.58 on 5 and 62 DF, p-value: < 2.2e-16
check_model(Cmax_lm2_v1)
broom::tidy(Cmax_lm2_v2, conf.int = TRUE)
## # A tibble: 6 × 7
## term estimate std.error statistic p.value conf.low conf.high
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 0.150 0.491 0.305 0.761 -0.831 1.13
## 2 dose_g_per_kg -0.0122 0.954 -0.0128 0.990 -1.92 1.89
## 3 routePO -0.508 0.0757 -6.70 0.00000000697 -0.659 -0.356
## 4 foodfed -0.286 0.0934 -3.06 0.00324 -0.473 -0.0994
## 5 sexM 0.0877 0.119 0.736 0.464 -0.151 0.326
## 6 age 0.00496 0.00115 4.30 0.0000613 0.00265 0.00726
# analysis of model linearity using Residuals vs Fitted plot
autoplot(Cmax_lm2_v2, 1, ncol = 1)
# Linearity diagnosis with ggResidpanel library
resid_panel(Cmax_lm2_v2, plots = "resid", smoother = TRUE)
## `geom_smooth()` using formula = 'y ~ x'
# Diagnosis of homoscedasticity of residuals using the Scale-Location plot
autoplot(Cmax_lm2_v2, 3, ncol = 1)
# Diagnosing the normality of residual distribution using a Normal Q-Q plot:
resid_panel(Cmax_lm2_v2, plots = c("qq", "hist"))
# Diagnosis of outliers and anomalous observations using the Residuals vs Leverage plot and Cook's distance plot
resid_panel(Cmax_lm2_v2, plots = c("lev", "cookd"))
t_max_coef <- coeftest(Cmax_lm2_v2, vcov. = vcovHC, type = "HC3") %>%
broom::tidy(conf.int = TRUE) %>%
mutate(term = c("Intercept",
"Dose (g/kg)",
"Peroral route",
"Food status: \"Fed\"",
"sex M",
"age")) %>%
rename("Variable" = term,
"β estimate" = estimate,
"Standard error" = std.error,
"Wald t-test statistics" = statistic,
"P-value" = p.value,
"95%CI low" = conf.low,
"95%CI high" = conf.high)
t_max_coef %>%
mutate(across(where(is.numeric), ~round(.x, 3))) %>%
flextable()
Variable | β estimate | Standard error | Wald t-test statistics | P-value | 95%CI low | 95%CI high |
|---|---|---|---|---|---|---|
Intercept | 0.150 | 1.258 | 0.119 | 0.906 | -2.365 | 2.665 |
Dose (g/kg) | -0.012 | 2.449 | -0.005 | 0.996 | -4.907 | 4.883 |
Peroral route | -0.508 | 0.171 | -2.971 | 0.004 | -0.849 | -0.166 |
Food status: "Fed" | -0.286 | 0.158 | -1.813 | 0.075 | -0.602 | 0.029 |
sex M | 0.088 | 0.233 | 0.377 | 0.708 | -0.378 | 0.553 |
age | 0.005 | 0.001 | 4.278 | 0.000 | 0.003 | 0.007 |
ggpredict(Cmax_lm2_v2, terms = c("age", "route"), ci_level = 0) %>% plot(show_data = TRUE, log_y = FALSE, jitter = .01) + labs(subtitle = "Adjusted R-squared: 0.8459")
## Model has log-transformed response. Back-transforming predictions to
## original response scale. Standard errors are still on the transformed
## scale.
plot_model(Cmax_lm2_v2, show.values = TRUE, show.intercept = TRUE, value.offset = 0.3, digits = 3,title = "Regression coeffecients with 95% CI") +
geom_hline(yintercept = 0, color = "gray3", size = 0.5) +
theme_nice()
log(Cmax) ~ age + lbm_kg
Cmax_lm2_v3 <- lm(Cmax ~ age + lbm_kg, data = order_0_df_factor)
summary(Cmax_lm2_v3)
##
## Call:
## lm(formula = Cmax ~ age + lbm_kg, data = order_0_df_factor)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.35699 -0.11888 -0.02071 0.09604 0.42718
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.862906 0.304755 6.113 1.82e-07 ***
## age 0.005261 0.001886 2.789 0.00762 **
## lbm_kg -0.008623 0.004188 -2.059 0.04508 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1935 on 47 degrees of freedom
## (96 пропущенных наблюдений удалены)
## Multiple R-squared: 0.36, Adjusted R-squared: 0.3328
## F-statistic: 13.22 on 2 and 47 DF, p-value: 2.784e-05
#check_model(Cmax_lm2_v1)
broom::tidy(Cmax_lm2_v3, conf.int = TRUE)
## # A tibble: 3 × 7
## term estimate std.error statistic p.value conf.low conf.high
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 1.86 0.305 6.11 0.000000182 1.25 2.48
## 2 age 0.00526 0.00189 2.79 0.00762 0.00147 0.00906
## 3 lbm_kg -0.00862 0.00419 -2.06 0.0451 -0.0170 -0.000197
# analysis of model linearity using Residuals vs Fitted plot
autoplot(Cmax_lm2_v3, 1, ncol = 1)
# Linearity diagnosis with ggResidpanel library
resid_panel(Cmax_lm2_v3, plots = "resid", smoother = TRUE)
## `geom_smooth()` using formula = 'y ~ x'
# Diagnosis of homoscedasticity of residuals using the Scale-Location plot
autoplot(Cmax_lm2_v3, 3, ncol = 1)
# Diagnosing the normality of residual distribution using a Normal Q-Q plot:
resid_panel(Cmax_lm2_v3, plots = c("qq", "hist"))
# Diagnosis of outliers and anomalous observations using the Residuals vs Leverage plot and Cook's distance plot
resid_panel(Cmax_lm2_v3, plots = c("lev", "cookd"))
1.6.4. Regression models for C_0
order_0_df
## # A tibble: 146 × 21
## study_id subject_id dose_g_per_kg route infusion_duration_min food site
## <fct> <chr> <dbl> <fct> <fct> <fct> <fct>
## 1 Ammon_1996 A-IV-1 0.3 IV 60 fed venous
## 2 Ammon_1996 A-IV-10 0.3 IV 60 fed venous
## 3 Ammon_1996 A-IV-11 0.3 IV 60 fed venous
## 4 Ammon_1996 A-IV-12 0.3 IV 60 fed venous
## 5 Ammon_1996 A-IV-2 0.3 IV 60 fed venous
## 6 Ammon_1996 A-IV-3 0.3 IV 60 fed venous
## 7 Ammon_1996 A-IV-4 0.3 IV 60 fed venous
## 8 Ammon_1996 A-IV-5 0.3 IV 60 fed venous
## 9 Ammon_1996 A-IV-6 0.3 IV 60 fed venous
## 10 Ammon_1996 A-IV-7 0.3 IV 60 fed venous
## # ℹ 136 more rows
## # ℹ 14 more variables: Beta <dbl>, C_0_0_order <dbl>, V_d_0_order <dbl>,
## # AUC <dbl>, Cmax <dbl>, Tmax <dbl>, CL <dbl>, wt_kg <dbl>, sex <fct>,
## # age <dbl>, bh_cm <int>, bmi <dbl>, bsa_m2 <dbl>, lbm_kg <dbl>
C_0_0_lm1_v1 <- lm(log(C_0_0_order) ~ dose_g_per_kg + route + food + site + study_id, data = order_0_df)
summary(C_0_0_lm1_v1)
##
## Call:
## lm(formula = log(C_0_0_order) ~ dose_g_per_kg + route + food +
## site + study_id, data = order_0_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.273548 -0.053489 -0.001246 0.066086 0.283716
##
## Coefficients: (3 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.81812 0.11202 -7.303 1.14e-10 ***
## dose_g_per_kg 0.57296 0.47240 1.213 0.2284
## routePO 0.43382 0.19331 2.244 0.0273 *
## foodfed -0.04388 0.06301 -0.696 0.4879
## sitevenous NA NA NA NA
## study_idJones_1984 -0.02089 0.05113 -0.409 0.6838
## study_idThierauf_Emberger_2023 0.03276 0.07954 0.412 0.6814
## study_idWilkinson-1976-IV 0.42749 0.18760 2.279 0.0251 *
## study_idWilkinson-1976-PO NA NA NA NA
## study_idYelland_2008 NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.09982 on 89 degrees of freedom
## (50 пропущенных наблюдений удалены)
## Multiple R-squared: 0.8394, Adjusted R-squared: 0.8286
## F-statistic: 77.52 on 6 and 89 DF, p-value: < 2.2e-16
check_model(C_0_0_lm1_v1)
Adjusted R-squared: 0.8286
- Posterior Predictive Check
The density lines of the models and observations almost coincide. This means that the model adequately reproduces the distribution of C_0_0.
- Linearity
The green smoothed line is close to horizontal, but there is a slight systematic trend visible at the right end (with large fitted values). There is no significant violation of linearity. It is possible that there is insufficient interaction or nonlinear effect of one of the predictors.
Homogeneity of variance There is no heteroscedasticity.
Influential observations
Several influential observations are visible, marked with numbers (78, 92, 62, 95, 67)
- Normality of residuals
The Q–Q chart looks acceptable.
- Collinearty VIF dose_g_per_kg, route, study_id - High ≥ 10
I will try to remove unnecessary indicators, for example, in each study, 1 level of food.
C_0_0_lm1_v2 <- lm(log(C_0_0_order) ~ study_id, data = order_0_df)
summary(C_0_0_lm1_v2)
##
## Call:
## lm(formula = log(C_0_0_order) ~ study_id, data = order_0_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.260871 -0.053489 0.001465 0.066086 0.283716
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.69011 0.02889 -23.89 <2e-16 ***
## study_idJones_1984 0.67454 0.03230 20.88 <2e-16 ***
## study_idThierauf_Emberger_2023 0.61441 0.04285 14.34 <2e-16 ***
## study_idWilkinson-1976-IV 0.63085 0.05004 12.61 <2e-16 ***
## study_idWilkinson-1976-PO 0.65411 0.04568 14.32 <2e-16 ***
## study_idYelland_2008 0.66301 0.04086 16.23 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1001 on 90 degrees of freedom
## (50 пропущенных наблюдений удалены)
## Multiple R-squared: 0.8367, Adjusted R-squared: 0.8277
## F-statistic: 92.25 on 5 and 90 DF, p-value: < 2.2e-16
check_model(C_0_0_lm1_v2)
Adjusted R-squared: 0.8277 Removed collinearity.
order_0_df
## # A tibble: 146 × 21
## study_id subject_id dose_g_per_kg route infusion_duration_min food site
## <fct> <chr> <dbl> <fct> <fct> <fct> <fct>
## 1 Ammon_1996 A-IV-1 0.3 IV 60 fed venous
## 2 Ammon_1996 A-IV-10 0.3 IV 60 fed venous
## 3 Ammon_1996 A-IV-11 0.3 IV 60 fed venous
## 4 Ammon_1996 A-IV-12 0.3 IV 60 fed venous
## 5 Ammon_1996 A-IV-2 0.3 IV 60 fed venous
## 6 Ammon_1996 A-IV-3 0.3 IV 60 fed venous
## 7 Ammon_1996 A-IV-4 0.3 IV 60 fed venous
## 8 Ammon_1996 A-IV-5 0.3 IV 60 fed venous
## 9 Ammon_1996 A-IV-6 0.3 IV 60 fed venous
## 10 Ammon_1996 A-IV-7 0.3 IV 60 fed venous
## # ℹ 136 more rows
## # ℹ 14 more variables: Beta <dbl>, C_0_0_order <dbl>, V_d_0_order <dbl>,
## # AUC <dbl>, Cmax <dbl>, Tmax <dbl>, CL <dbl>, wt_kg <dbl>, sex <fct>,
## # age <dbl>, bh_cm <int>, bmi <dbl>, bsa_m2 <dbl>, lbm_kg <dbl>
C_0_0_lm2_v1 <- lm(log(C_0_0_order) ~ study_id + sex+age , data = order_0_df)
summary(C_0_0_lm2_v1) #sex and age depend on study_id, there is no point in looking at all the data
##
## Call:
## lm(formula = log(C_0_0_order) ~ study_id + sex + age, data = order_0_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.27793 -0.09038 0.04045 0.09746 0.15628
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.139503 0.141562 -0.985 0.341
## study_idWilkinson-1976-PO 0.052919 0.119010 0.445 0.663
## sexM 0.025163 0.096885 0.260 0.799
## age 0.001099 0.003004 0.366 0.720
##
## Residual standard error: 0.1462 on 14 degrees of freedom
## (128 пропущенных наблюдений удалены)
## Multiple R-squared: 0.04178, Adjusted R-squared: -0.1635
## F-statistic: 0.2035 on 3 and 14 DF, p-value: 0.8923
df_sa <- order_0_df %>% filter(!is.na(sex), !is.na(age), !is.na(C_0_0_order))
# Wilkinson and Thierauf data, 18 observations
df_sa <- df_sa %>% mutate(bmi = wt_kg / (bh_cm/100)^2,
bsa_m2 = sqrt(bh_cm * wt_kg / 3600)) # bsa fits better in the model
C_0_0_lm2_v2 <- lm(log(C_0_0_order) ~ log(dose_g_per_kg) + bsa_m2 + sex, data = df_sa)
summary(C_0_0_lm2_v2)
##
## Call:
## lm(formula = log(C_0_0_order) ~ log(dose_g_per_kg) + bsa_m2 +
## sex, data = df_sa)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.24681 -0.08761 0.01368 0.09441 0.20278
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.3044 0.3613 -0.842 0.4137
## log(dose_g_per_kg) 0.9798 0.6431 1.524 0.1499
## bsa_m2 0.4939 0.2732 1.808 0.0922 .
## sexM -0.2210 0.1658 -1.333 0.2039
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.132 on 14 degrees of freedom
## Multiple R-squared: 0.2189, Adjusted R-squared: 0.05151
## F-statistic: 1.308 on 3 and 14 DF, p-value: 0.311
check_model(C_0_0_lm2_v2)
It looks better visually, but there are a lot of predictors. I calculated BMI and BSA and tried to include them in the model in various ways.
C_0_0_lm3_v1 <- lm(log(C_0_0_order) ~ bsa_m2 , data = order_0_df)
summary(C_0_0_lm3_v1)
##
## Call:
## lm(formula = log(C_0_0_order) ~ bsa_m2, data = order_0_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.25349 -0.10107 0.02324 0.09509 0.17215
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.4567 0.3092 -1.477 0.159
## bsa_m2 0.2087 0.1611 1.296 0.213
##
## Residual standard error: 0.1329 on 16 degrees of freedom
## (128 пропущенных наблюдений удалены)
## Multiple R-squared: 0.09499, Adjusted R-squared: 0.03842
## F-statistic: 1.679 on 1 and 16 DF, p-value: 0.2134
check_model(C_0_0_lm3_v1)
1.6.5. Regression models for Vd
fit_Vd_1 <- lm(V_d_0_order ~ route + food + site, data = order_0_df_factor)
summary(fit_Vd_1)
##
## Call:
## lm(formula = V_d_0_order ~ route + food + site, data = order_0_df_factor)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.187399 -0.045232 0.001478 0.044736 0.216100
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.61519 0.02098 29.320 < 2e-16 ***
## routePO 0.06870 0.02069 3.320 0.00129 **
## foodfed -0.01162 0.01688 -0.688 0.49290
## sitevenous NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.07512 on 93 degrees of freedom
## (50 пропущенных наблюдений удалены)
## Multiple R-squared: 0.1337, Adjusted R-squared: 0.115
## F-statistic: 7.174 on 2 and 93 DF, p-value: 0.001266
cat("\n===============================================================\n")
##
## ===============================================================
fit_Vd_3 <- lm(V_d_0_order ~ route, data = order_0_df_factor)
summary(fit_Vd_3)
##
## Call:
## lm(formula = V_d_0_order ~ route, data = order_0_df_factor)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.195744 -0.041954 0.002783 0.046007 0.207755
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.60744 0.01766 34.402 < 2e-16 ***
## routePO 0.07317 0.01959 3.735 0.000322 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.07491 on 94 degrees of freedom
## (50 пропущенных наблюдений удалены)
## Multiple R-squared: 0.1292, Adjusted R-squared: 0.12
## F-statistic: 13.95 on 1 and 94 DF, p-value: 0.0003215
cat("\n===============================================================\n")
##
## ===============================================================
fit_Vd_4 <- lm(V_d_0_order ~ route + site, data = order_0_df_factor)
summary(fit_Vd_4)
##
## Call:
## lm(formula = V_d_0_order ~ route + site, data = order_0_df_factor)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.187399 -0.045232 0.001478 0.044736 0.216100
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.61519 0.02098 29.320 < 2e-16 ***
## routePO 0.06870 0.02069 3.320 0.00129 **
## sitevenous -0.01162 0.01688 -0.688 0.49290
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.07512 on 93 degrees of freedom
## (50 пропущенных наблюдений удалены)
## Multiple R-squared: 0.1337, Adjusted R-squared: 0.115
## F-statistic: 7.174 on 2 and 93 DF, p-value: 0.001266
cat("\n===============================================================\n")
##
## ===============================================================
fit_Vd_5 <- lm(V_d_0_order ~ route + food, data = order_0_df_factor)
summary(fit_Vd_5)
##
## Call:
## lm(formula = V_d_0_order ~ route + food, data = order_0_df_factor)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.187399 -0.045232 0.001478 0.044736 0.216100
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.61519 0.02098 29.320 < 2e-16 ***
## routePO 0.06870 0.02069 3.320 0.00129 **
## foodfed -0.01162 0.01688 -0.688 0.49290
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.07512 on 93 degrees of freedom
## (50 пропущенных наблюдений удалены)
## Multiple R-squared: 0.1337, Adjusted R-squared: 0.115
## F-statistic: 7.174 on 2 and 93 DF, p-value: 0.001266
cat("\n===============================================================\n")
##
## ===============================================================
anova(fit_Vd_1, fit_Vd_3)
## Analysis of Variance Table
##
## Model 1: V_d_0_order ~ route + food + site
## Model 2: V_d_0_order ~ route
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 93 0.52484
## 2 94 0.52752 -1 -0.0026745 0.4739 0.4929
anova(fit_Vd_1, fit_Vd_4)
## Analysis of Variance Table
##
## Model 1: V_d_0_order ~ route + food + site
## Model 2: V_d_0_order ~ route + site
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 93 0.52484
## 2 93 0.52484 0 0
anova(fit_Vd_1, fit_Vd_5)
## Analysis of Variance Table
##
## Model 1: V_d_0_order ~ route + food + site
## Model 2: V_d_0_order ~ route + food
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 93 0.52484
## 2 93 0.52484 0 0
fit_Vd_3_sub <- lm(V_d_0_order ~ 1, data = order_0_df_factor)
anova(fit_Vd_3, fit_Vd_3_sub)
## Analysis of Variance Table
##
## Model 1: V_d_0_order ~ route
## Model 2: V_d_0_order ~ 1
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 94 0.52752
## 2 95 0.60581 -1 -0.078296 13.952 0.0003215 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
check_model(fit_Vd_3)
fit_Vd_3_add <- lm(V_d_0_order ~ route + wt_kg * sex, data = order_0_df_factor)
fit_Vd_3_add2 <- lm(V_d_0_order ~ route + wt_kg + sex, data = order_0_df_factor)
anova(fit_Vd_3_add, fit_Vd_3_add2)
## Analysis of Variance Table
##
## Model 1: V_d_0_order ~ route + wt_kg * sex
## Model 2: V_d_0_order ~ route + wt_kg + sex
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 19 0.12279
## 2 20 0.12279 -1 -1.1327e-06 2e-04 0.9896
summary(fit_Vd_3_add2)
##
## Call:
## lm(formula = V_d_0_order ~ route + wt_kg + sex, data = order_0_df_factor)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.120547 -0.051799 -0.007649 0.039997 0.182814
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.817074 0.116676 7.003 8.55e-07 ***
## routePO 0.016822 0.038723 0.434 0.6686
## wt_kg -0.003918 0.001551 -2.526 0.0201 *
## sexM 0.110812 0.045764 2.421 0.0251 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.07836 on 20 degrees of freedom
## (122 пропущенных наблюдений удалены)
## Multiple R-squared: 0.2998, Adjusted R-squared: 0.1948
## F-statistic: 2.855 on 3 and 20 DF, p-value: 0.06298
fit_Vd <- lm(V_d_0_order ~ wt_kg + sex, data = order_0_df_factor)
anova(fit_Vd, fit_Vd_3_add2)
## Analysis of Variance Table
##
## Model 1: V_d_0_order ~ wt_kg + sex
## Model 2: V_d_0_order ~ route + wt_kg + sex
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 21 0.12395
## 2 20 0.12279 1 0.0011586 0.1887 0.6686
summary(fit_Vd)
##
## Call:
## lm(formula = V_d_0_order ~ wt_kg + sex, data = order_0_df_factor)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.115225 -0.054629 -0.004847 0.036033 0.187856
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.836182 0.105960 7.891 1.02e-07 ***
## wt_kg -0.003953 0.001519 -2.603 0.0166 *
## sexM 0.105957 0.043513 2.435 0.0239 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.07683 on 21 degrees of freedom
## (122 пропущенных наблюдений удалены)
## Multiple R-squared: 0.2932, Adjusted R-squared: 0.2259
## F-statistic: 4.356 on 2 and 21 DF, p-value: 0.02615
check_model(fit_Vd)
resid_panel(fit_Vd, plots = c("lev", "cookd"))
ggpredict(fit_Vd, terms = c("wt_kg", "sex"), ci_level = 0.95) %>% plot(show_data = TRUE, log_y = FALSE, jitter = .01) +
labs(title = "Predicted values of Vd",
subtitle = "Adjusted R-squared: 0.2259",
color = "Sex") +
xlab("Weight (kg)") +
ylab("Vd (L / kg)")
plot_model(fit_Vd, digits = 3, show.values = TRUE, show.intercept = TRUE, value.offset = 0.3, axis.labels = c("Weight (kg)", "Sex", "Intercept"), title = "Regression coeffecients with 95% CI") +
theme_nice()
1.6.6. Regression models for Beta
fit_Beta_1 <- lm(Beta ~ dose_g_per_kg * route, data = order_0_df_factor)
summary(fit_Beta_1)
##
## Call:
## lm(formula = Beta ~ dose_g_per_kg * route, data = order_0_df_factor)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.138e-03 -2.014e-04 -1.855e-05 1.265e-04 8.673e-04
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.0003570 0.0002436 1.465 0.146
## dose_g_per_kg 0.0035761 0.0005877 6.085 2.64e-08 ***
## routePO 0.0044200 0.0005433 8.135 1.88e-12 ***
## dose_g_per_kg:routePO -0.0073689 0.0009400 -7.840 7.76e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0003303 on 92 degrees of freedom
## (50 пропущенных наблюдений удалены)
## Multiple R-squared: 0.5181, Adjusted R-squared: 0.5024
## F-statistic: 32.97 on 3 and 92 DF, p-value: 1.466e-14
fit_Beta_1_sub <- lm(Beta ~ 1, data = order_0_df_factor)
anova(fit_Beta_1, fit_Beta_1_sub)
## Analysis of Variance Table
##
## Model 1: Beta ~ dose_g_per_kg * route
## Model 2: Beta ~ 1
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 92 1.0039e-05
## 2 95 2.0830e-05 -3 -1.0791e-05 32.966 1.466e-14 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
check_model(fit_Beta_1)
resid_panel(fit_Beta_1, plots = c("lev", "cookd"))
ggpredict(fit_Beta_1, terms = c("dose_g_per_kg", "route"), ci_level = 0) %>% plot(show_data = TRUE, log_y = FALSE, jitter = .01) + labs(subtitle = "Adjusted R-squared: 0.5671") + xlab("Dose (g per kg)")
plot_model(fit_Beta_1, show.values = TRUE, show.intercept = TRUE, value.offset = 0.3, axis.labels = c("Dose (g per kg)", "Route (PO)", "Dose (g per kg) x Route (PO)", "Intercept"), title = "Regression coeffecients with 95% CI") +
theme_nice()